#!/usr/bin/env python3
# -*- coding:utf8 -*-
from colossalai.amp import AMP_TYPE

BATCH_SIZE = 32
NUM_EPOCHS = 3
NUM_MICRO_BATCHES = 100
# gradient_accumulation = 128



fp16=dict(
    mode=AMP_TYPE.TORCH
    # mode=AMP_TYPE.NAIVE
)

# parallel=dict(
#     data=1,
#     pipeline=1,
#     tensor=dict(size=8, mode='1d')
# )
